In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%pylab inline
pd.__version__ # need 0.14.0 for multiindex slicing
Out[2]:
In [3]:
o = pd.read_table("overall_statistics_ksmall.txt").set_index(["K","M","STATISTIC"])["VALUE"].unstack().loc[(10,200),:]
v = pd.read_table("variable_statistics_ksmall.txt").set_index(["K","M","STATISTIC","VARIABLE"])["VALUE"].unstack().unstack().loc[(10,200),:].unstack()
In [4]:
statistics_of_interest = ["rms_error","max_error","precisionbits","srr","correlation"]
In [5]:
# load variable information for joining levels to variables
v_info = pd.read_table("variable_information.txt").set_index(["VARIABLE","INFO"]).unstack().loc[:,"VALUE"]
v_info["levels"] = v_info["levels"].astype("int")
v_info.columns.name = ""
In [8]:
v.sort("rms_error")[["rms_error","max_error","precisionbits","srr"]].join(v_info["name"]).tail(5)
Out[8]:
In [ ]:
v.sort("rms_error")[statistics_of_interest].join(v_info).tail(10)
In [ ]:
v.sort("max_error")[statistics_of_interest].join(v_info).head(10)
In [ ]:
v.sort("max_error")[statistics_of_interest].join(v_info).tail(10)
In [ ]:
v["error_ratio"] = v.max_error / v.rms_error
er = v.loc(axis=1)[("error_ratio",)].sort("error_ratio")
(er.mean(), er.median(),er.head(5),er.tail(5))
In [ ]:
v.loc[["U","FSDSC","Z3","CCN3"],["max_error","rms_error"]].T